INTEGRANTES
| Nombre | Cédula |
|---|---|
| Valentina Vanessa RodrÃguez Villamizar | 1010085748 |
| Carmen Daniela Zabaleta Cardeño | 1007706542 |
| Genaro Alfonso Aristizabal Echeverri | 1007706542 |
# Base orginal, cargada desde el sitio del DANE
base_compl <- read_excel("anexo-exportaciones-cafe-carbon-petroleo-ferroniquel-no-tradicionales-sep22.xlsx",skip=12) %>% clean_names()
# Se selecciona solo las variables correspondientes a fecha y valor en miles de dolares de las exportaciones de café en Colombia
base <- base_compl[1:442, c(1,3)] %>%
rename("valor" = "miles_de_dolares_fob_3") %>%
na.omit(base) #eliminar fila de NA presente en la base original
# crea un vector que contiene las filas de los totales por año, para luego eliminar esas observaciones de la base de datos.
x <- seq(13, 399, by=13)
base <- base[-(x), ]
exp_cafe <- data.frame(fecha=seq(as.Date("1992/1/1"),
as.Date("2022/9/1"), "months"),
valor=base[1:369, 2])
# El valor correspondiente a las exportaciones en miles dolares, se dividio en cien mil dolares
exp_cafe$valor <- exp_cafe$valor/100000
serie_original <- ts(exp_cafe$valor, start=c(1992,1),
frequency = 12)
Ya que la serie no presenta estabilización en la varianza, se plantean modelos tanto para la serie sin transformar, como para la serie transformada.
serie_original %>% plot(main="Serie Original",
las=1)
modelo1 <- auto.arima(serie_original, stepwise = FALSE,
approximation = FALSE)
modelo1 %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.319375 0.105850 3.0172 0.002551 **
## ma1 -0.752517 0.078045 -9.6421 < 2.2e-16 ***
## sar1 0.171720 0.052893 3.2466 0.001168 **
## sar2 0.175184 0.057002 3.0733 0.002117 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chequeo de los supuestos del error del modelo
modelo1 %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(2,0,0)[12]
## Q* = 35.504, df = 20, p-value = 0.01758
##
## Model df: 4. Total lags used: 24
modelo1$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.94619, p-value = 2.504e-10
modelo1$residuals %>% qqnorm()
modelo1$residuals %>% qqline()
#delta <- seq(0.05, 0.95, 0.05)
#aic_1 <- vector()
#ljungbox1 <- vector()
#i = 0
#for(d in delta){
#i = i+1
#modelo_outl <- tso(serie_original, delta=d)
#aic_1[i] <- modelo_outl$fit$aic
#ljungbox1[i] <- checkresiduals(modelo_outl$fit,
#plot = FALSE)$p.value}
#aic1 <- which.min(aic_1)
#delta1 <- delta[aic1]
modelo3 <- tso(serie_original, delta=0.85)
modelo3
## Series: serie_original
## Regression with ARIMA(1,1,1)(0,0,2)[12] errors
##
## Coefficients:
## ar1 ma1 sma1 sma2 TC65 TC228 AO292 AO300 LS355
## 0.5313 -0.9140 0.1606 0.2216 1.0415 1.1006 1.0420 2.1352 1.3424
## s.e. 0.0841 0.0476 0.0524 0.0588 0.2549 0.2657 0.2432 0.2386 0.2225
##
## sigma^2 = 0.08497: log likelihood = -65.01
## AIC=150.02 AICc=150.64 BIC=189.1
##
## Outliers:
## type ind time coefhat tstat
## 1 TC 65 1997:05 1.042 4.087
## 2 TC 228 2010:12 1.101 4.142
## 3 AO 292 2016:04 1.042 4.285
## 4 AO 300 2016:12 2.135 8.950
## 5 LS 355 2021:07 1.342 6.034
modelo3_val <- arimax(serie_original, order=c(1, 1, 1),
seasonal = list(order = c(0, 0, 2)),
xtransf=data.frame(
mayo1997a=1*(seq_along(serie_original) == 65),
diciembre2010a=1*(seq_along(serie_original) == 228),
abril2016a=1*(seq_along(serie_original) == 292),
dic2016a=1*(seq_along(serie_original) == 300),
julio2021a=1*c(rep(0,354),rep(1,15))),
transfer=list(c(1, 0),c(1, 0),c(0, 0),
c(0, 0), c(0, 0)))
modelo3_val %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.531266 0.075782 7.0105 2.375e-12 ***
## ma1 -0.919385 0.040271 -22.8301 < 2.2e-16 ***
## sma1 0.166607 0.052301 3.1856 0.001445 **
## sma2 0.235394 0.059494 3.9566 7.602e-05 ***
## mayo1997a-AR1 0.925959 0.040499 22.8639 < 2.2e-16 ***
## mayo1997a-MA0 1.009486 0.238366 4.2350 2.285e-05 ***
## diciembre2010a-AR1 0.892081 0.042689 20.8971 < 2.2e-16 ***
## diciembre2010a-MA0 1.088993 0.256988 4.2375 2.260e-05 ***
## abril2016a-MA0 1.032011 0.242480 4.2561 2.081e-05 ***
## dic2016a-MA0 2.140767 0.237546 9.0120 < 2.2e-16 ***
## julio2021a-MA0 1.337568 0.217949 6.1371 8.405e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chequeo de los supuestos del error del modelo
modelo3_val %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(0,0,2)[12]
## Q* = 23.898, df = 13, p-value = 0.03208
##
## Model df: 11. Total lags used: 24
modelo3_val$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.98764, p-value = 0.003146
modelo3_val$residuals %>% qqnorm()
modelo3_val$residuals %>% qqline()
cafe_lambda <- BoxCox.lambda(serie_original)
cafe_lambda
## [1] 0.0220795
#lAMBDA DE LA TRANSFROMACIÓN BOXCOX
serie_boxcox <- BoxCox(serie_original,
lambda=cafe_lambda)
serie_boxcox %>% plot(main="Serie Transformada - boxcox",
las=1)
modelo2 <- auto.arima(serie_boxcox, stepwise = FALSE,
approximation = FALSE)
modelo2
## Series: serie_boxcox
## ARIMA(1,1,2)(2,0,0)[12]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sar2
## 0.8915 -1.3463 0.3583 0.2557 0.2395
## s.e. 0.0463 0.0777 0.0712 0.0512 0.0545
##
## sigma^2 = 0.04141: log likelihood = 64.68
## AIC=-117.35 AICc=-117.12 BIC=-93.9
modelo2 %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.891467 0.046299 19.2545 < 2.2e-16 ***
## ma1 -1.346292 0.077718 -17.3228 < 2.2e-16 ***
## ma2 0.358295 0.071190 5.0330 4.829e-07 ***
## sar1 0.255694 0.051188 4.9952 5.878e-07 ***
## sar2 0.239483 0.054509 4.3934 1.116e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chequeo de los supuestos del error del modelo
modelo2 %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,2)(2,0,0)[12]
## Q* = 24.707, df = 19, p-value = 0.1704
##
## Model df: 5. Total lags used: 24
modelo2$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.99551, p-value = 0.3699
modelo2$residuals %>% qqnorm()
modelo2$residuals %>% qqline()
delta <- seq(0.05, 0.95, 0.05)
aic_1 <- vector()
ljungbox1 <- vector()
i = 0
#for(d in delta){
#i = i+1
#modelo_outl <- tso(serie_boxcox, delta=d)
#aic_1[i] <- modelo_outl$fit$aic
#ljungbox1[i] <- checkresiduals(modelo_outl$fit,
#plot = FALSE)$p.value}
#aic2 <- which.min(aic_1)
#delta2 <- delta[19]
#delta2
modelo4 <- tso(serie_boxcox, delta=0.95)
modelo4_val <- arimax(serie_boxcox, order=c(0, 1, 1),
seasonal = list(order = c(2, 0, 0)),
xtransf=data.frame(
junio1994a=1*(seq_along(serie_boxcox) == 30),
sept2004a=1*(seq_along(serie_boxcox) == 153),
dic2016a=1*(seq_along(serie_boxcox) == 300)),
transfer=list(c(0, 0),c(0, 0),c(0, 0)))
modelo4_val %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.442708 0.055111 -8.0330 9.510e-16 ***
## sar1 0.247389 0.050367 4.9117 9.030e-07 ***
## sar2 0.255917 0.053891 4.7487 2.047e-06 ***
## junio1994a-MA0 0.076360 0.157202 0.4857 0.6271
## sept2004a-MA0 -0.627691 0.157343 -3.9893 6.627e-05 ***
## dic2016a-MA0 0.693060 0.156838 4.4189 9.918e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelo5_val <- arimax(serie_boxcox, order=c(0, 1, 1),
seasonal = list(order = c(2, 0, 0)),
xtransf=data.frame(
sept2004a=1*(seq_along(serie_boxcox) == 153),
dic2016a=1*(seq_along(serie_boxcox) == 300)),
transfer=list(c(0, 0),c(0, 0)))
modelo5_val %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.441900 0.054958 -8.0407 8.936e-16 ***
## sar1 0.246734 0.050316 4.9037 9.405e-07 ***
## sar2 0.257525 0.053732 4.7928 1.645e-06 ***
## sept2004a-MA0 -0.627473 0.157307 -3.9889 6.639e-05 ***
## dic2016a-MA0 0.693183 0.156805 4.4207 9.840e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chequeo de los supuestos del error del modelo
modelo5_val %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(2,0,0)[12]
## Q* = 36.959, df = 19, p-value = 0.00803
##
## Model df: 5. Total lags used: 24
modelo5_val$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.99757, p-value = 0.8696
modelo5_val$residuals %>% qqnorm()
modelo5_val$residuals %>% qqline()
train_md1 <- window(serie_original,start = c(1992,1),end =c(2022,8))
test_md1 <- window(serie_original,start = c(2021,9),end =c(2022,9))
modelo_train1 <- auto.arima(train_md1, stepwise = FALSE, approximation = FALSE)
modelo_train1 %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.320480 0.107122 2.9917 0.002774 **
## ma1 -0.753234 0.078798 -9.5591 < 2.2e-16 ***
## sar1 0.171702 0.052958 3.2422 0.001186 **
## sar2 0.174890 0.057190 3.0581 0.002228 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#delta <- seq(0.1, 0.90, 0.1)
#aic_1 <- vector()
#ljungbox1 <- vector()
#i = 0
#for(d in delta){
#i = i+1
#modelo_outl <- tso(train_banco, delta=d)
#aic_1[i] <- modelo_outl$fit$aic
#ljungbox1[i] <- checkresiduals(modelo_outl$fit,
#plot = FALSE)$p.value
#}
#which.min(aic_1)
# 0.8
modelo_train2 <- tso(train_md1, delta=0.85)
modelo_train2
## Series: train_md1
## Regression with ARIMA(1,1,1)(0,0,2)[12] errors
##
## Coefficients:
## ar1 ma1 sma1 sma2 TC65 TC228 AO292 AO300 LS355
## 0.5323 -0.9145 0.1623 0.2202 1.0418 1.0994 1.0444 2.1339 1.3392
## s.e. 0.0835 0.0471 0.0529 0.0590 0.2553 0.2661 0.2436 0.2389 0.2224
##
## sigma^2 = 0.08519: log likelihood = -65.29
## AIC=150.58 AICc=151.2 BIC=189.64
##
## Outliers:
## type ind time coefhat tstat
## 1 TC 65 1997:05 1.042 4.081
## 2 TC 228 2010:12 1.099 4.131
## 3 AO 292 2016:04 1.044 4.287
## 4 AO 300 2016:12 2.134 8.933
## 5 LS 355 2021:07 1.339 6.020
modelo_train2$fit %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,2)[12] errors
## Q* = 25.911, df = 15, p-value = 0.03897
##
## Model df: 9. Total lags used: 24
modelo_train2$fit$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.9867, p-value = 0.001872
modelo_train2$fit$residuals %>%qqnorm()
modelo_train2$fit$residuals %>%qqline()
npred <- 22
# Para el modelo 1:
fore1 <- forecast(modelo_train1, h=npred)
# Para el modelo 2:
newxreg <- outliers.effects(modelo_train2$outliers,
length(train_md1) + npred)
newxreg <- ts(newxreg[-seq_along(train_md1),],
start = c(2021,9))
fore2 <- forecast(modelo_train2$fit, h=22,
xreg = newxreg)
accuracy(fore1, test_md1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01342229 0.34482450 0.24547997 -2.5693810 16.0815363 0.59856132
## Test set 0.02550935 0.02550935 0.02550935 0.7874534 0.7874534 0.06220022
## ACF1
## Training set -0.008655743
## Test set NA
accuracy(fore2, test_md1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.009478541 0.28788405 0.21593627 -2.543998 15.018364 0.5265240
## Test set 0.086736562 0.08673656 0.08673656 2.677489 2.677489 0.2114924
## ACF1
## Training set -0.04845442
## Test set NA
df_train <- data.frame(fecha= exp_cafe$fecha[1:length(train_md1)], real=
exp_cafe$valor[1:length(train_md1)], pred1 = modelo_train1$fitted, pred2 = modelo_train2$fit$fitted)
df_train %>% ggplot(aes(x=fecha, y=real), col="black")+geom_line()+
geom_line(aes(x=fecha, y=pred1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=pred2),col="red", lty=3)
df_test<- data.frame(fecha= exp_cafe$fecha[348:369], real= exp_cafe$valor[348:369], pred1 = fore1$mean, pred2 = fore2$mean, li1=fore1$lower[,2], ls1=fore1$upper[,2], li2=fore2$lower[,2], ls2=fore2$upper[,2])
df_test %>% ggplot(aes(x=fecha, y=real), col="black")+
geom_line()+
geom_line(aes(x=fecha, y=pred1),col="blue")+
geom_line(aes(x=fecha, y=li1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=ls1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=pred2),col="red", lty=3)+
geom_line(aes(x=fecha, y=li2),col="red", lty=3)+
geom_line(aes(x=fecha, y=ls2),col="red", lty=3)
# predicciones
# modelo 1: modelo sin intervención: serie original
exp_cafe %>% dim()
## [1] 369 2
exp_cafe %>% ggplot(aes(x=fecha, y=valor))+geom_line(col="blue")
exp_cafe %>% head(4)
## fecha valor
## 1 1992-01-01 1.0886426
## 2 1992-02-01 1.1479851
## 3 1992-03-01 0.8946446
## 4 1992-04-01 1.1353458
exp_cafe %>% tail(4)
## fecha valor
## 366 2022-06-01 3.525042
## 367 2022-07-01 3.791430
## 368 2022-08-01 3.083797
## 369 2022-09-01 3.239474
serie_original <- ts(exp_cafe$valor, start=c(1992,1),
frequency = 12)
modelo1 <- auto.arima(serie_original, stepwise = FALSE,
approximation = FALSE)
modelo1 %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.319375 0.105850 3.0172 0.002551 **
## ma1 -0.752517 0.078045 -9.6421 < 2.2e-16 ***
## sar1 0.171720 0.052893 3.2466 0.001168 **
## sar2 0.175184 0.057002 3.0733 0.002117 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict(modelo1, n.ahead = 12)
## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 2022
## 2023 3.492218 3.598275 3.660204 3.459469 3.163194 3.301049 3.683027 3.589094
## Sep Oct Nov Dec
## 2022 3.319030 3.411899 3.666277
## 2023 3.512954
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2022
## 2023 0.4457252 0.4638411 0.4808612 0.4971722 0.5129255 0.5281970 0.5430355
## Aug Sep Oct Nov Dec
## 2022 0.3467152 0.3985458 0.4253392
## 2023 0.5574780 0.5715553
train1 <- window(serie_original, start=time(serie_original)[1],
end = time(serie_original)[length(serie_original) - 12])
ts_info(train1)
## The train1 series is a ts object with 1 variable and 357 observations
## Frequency: 12
## Start time: 1992 1
## End time: 2021 9
test1 <- window(serie_original, start = time(serie_original)[length(serie_original)
- 12 + 1],
end = time(serie_original)[length(serie_original)])
ts_info(test1)
## The test1 series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2021 10
## End time: 2022 9
# modelo 1: train
modelo1.train <- auto.arima(train1, stepwise = FALSE,
approximation = FALSE)
checkresiduals(modelo1.train)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,2)(2,0,0)[12]
## Q* = 28.625, df = 19, p-value = 0.07211
##
## Model df: 5. Total lags used: 24
# Asumiendo que los residuales del modelo provienen de una distribución normal, entonces evaluamos la precisión del modelo
# mediante la función accuracy del paquete forecast
fore1 <- forecast(modelo1.train, h=12)
accuracy(fore1, test1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01943998 0.3368295 0.2350374 -2.79167 16.00863 0.6036887
## Test set 0.96767136 1.0258021 0.9676714 28.32792 28.32792 2.4854436
## ACF1 Theil's U
## Training set 0.01040360 NA
## Test set 0.08842473 1.932304
test_forecast(actual = serie_original, forecast.obj = fore1,
test = test1)
naive_model1 <- naive(train1, h = 12)
test_forecast(actual = serie_original,
forecast.obj = naive_model1,
test = test1)
#NAIVE PARA ESTACIONALIDAD
accuracy(naive_model1, test1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.004145292 0.383093 0.2711962 -2.561313 18.67620 0.6965617
## Test set 0.785512707 0.866986 0.7855127 22.499882 22.49988 2.0175729
## ACF1 Theil's U
## Training set -0.24985968 NA
## Test set -0.01057417 1.599156
snaive_model1 <- snaive(train1, h = 12)
test_forecast(actual = serie_original,
forecast.obj = snaive_model1,
test = test1)
accuracy(snaive_model1, test1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04460802 0.5375893 0.3893355 -2.731798 25.72868 1.000000
## Test set 1.01835925 1.1819173 1.0296584 30.612100 30.97850 2.644656
## ACF1 Theil's U
## Training set 0.5861192 NA
## Test set 0.2076274 2.27936
#Modelo 2: tso de la serie original
delta <- seq(0.1, 0.90, 0.1)
aic_1 <- vector()
ljungbox1 <- vector()
i = 0
for(d in delta){
i = i+1
modelo_outl <- tso(train1, delta=d)
aic_1[i] <- modelo_outl$fit$aic
ljungbox1[i] <- checkresiduals(modelo_outl$fit,
plot = FALSE)$p.value
}
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,0,1)(0,0,2)[12] errors
## Q* = 24.193, df = 15, p-value = 0.06188
##
## Model df: 9. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,0,1)(1,0,0)[12] errors
## Q* = 35.6, df = 13, p-value = 0.0006844
##
## Model df: 11. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,0,1)(1,0,0)[12] errors
## Q* = 35.884, df = 13, p-value = 0.0006185
##
## Model df: 11. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,0)(1,0,0)[12] errors
## Q* = 30.878, df = 14, p-value = 0.005765
##
## Model df: 10. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,0,1)(0,0,2)[12] errors
## Q* = 27.375, df = 15, p-value = 0.02583
##
## Model df: 9. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,0,1)(0,0,2)[12] errors
## Q* = 28.219, df = 15, p-value = 0.02024
##
## Model df: 9. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(0,0,2)[12] errors
## Q* = 39.245, df = 16, p-value = 0.001002
##
## Model df: 8. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,1)[12] errors
## Q* = 39.207, df = 13, p-value = 0.0001853
##
## Model df: 11. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,2)[12] errors
## Q* = 21.854, df = 15, p-value = 0.1117
##
## Model df: 9. Total lags used: 24
which.min(aic_1)
## [1] 8
delta[8]
## [1] 0.8
ljungbox1[8]
## [1] 0.0001852894
modelo_train2 <- tso(train1, delta=0.8)
modelo_train2
## Series: train1
## Regression with ARIMA(1,1,1)(0,0,1)[12] errors
##
## Coefficients:
## ar1 ma1 sma1 LS30 TC65 AO72 TC228 AO292 AO300
## 0.3782 -0.8391 0.1620 0.9238 1.0114 0.9032 1.1518 1.1634 2.0628
## s.e. 0.0931 0.0596 0.0508 0.2150 0.2419 0.2320 0.2489 0.2340 0.2330
## TC353 TC355
## -1.2057 1.5112
## s.e. 0.2664 0.2645
##
## sigma^2 = 0.07461: log likelihood = -38.01
## AIC=100.01 AICc=100.92 BIC=146.51
##
## Outliers:
## type ind time coefhat tstat
## 1 LS 30 1994:06 0.9238 4.297
## 2 TC 65 1997:05 1.0114 4.180
## 3 AO 72 1997:12 0.9032 3.892
## 4 TC 228 2010:12 1.1518 4.628
## 5 AO 292 2016:04 1.1634 4.972
## 6 AO 300 2016:12 2.0628 8.851
## 7 TC 353 2021:05 -1.2057 -4.526
## 8 TC 355 2021:07 1.5112 5.714
modelo_train2$fit %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,1)[12] errors
## Q* = 39.207, df = 13, p-value = 0.0001853
##
## Model df: 11. Total lags used: 24
modelo_train2$fit$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.99422, p-value = 0.1956
modelo_train2$fit$residuals %>% jarque.bera.test()
##
## Jarque Bera Test
##
## data: .
## X-squared = 4.5683, df = 2, p-value = 0.1019
npred <- 12
newxreg <- outliers.effects(modelo_train2$outliers,
length(train1) + npred)
newxreg <- ts(newxreg[-seq_along(train1),],
start =time(serie_original)[length(serie_original)
- 12 + 1])
fore2 <- forecast(modelo_train2$fit, h=12,
xreg = newxreg)
accuracy(fore1, test1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01943998 0.3368295 0.2350374 -2.79167 16.00863 0.6036887
## Test set 0.96767136 1.0258021 0.9676714 28.32792 28.32792 2.4854436
## ACF1 Theil's U
## Training set 0.01040360 NA
## Test set 0.08842473 1.932304
accuracy(fore2, test1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002795297 0.2685198 0.2086785 -2.909939 15.08438 0.5359865
## Test set 0.950146996 1.0193111 0.9501470 27.501520 27.50152 2.4404326
## ACF1 Theil's U
## Training set -0.024004677 NA
## Test set 0.002255653 1.886619
df_train <- data.frame(fecha=
exp_cafe$fecha[1:length(train1)],
real=
exp_cafe$valor[1:length(train1)],
pred1 = modelo1.train$fitted,
pred2 = modelo_train2$fit$fitted)
df_train %>% ggplot(aes(x=fecha, y=real), col="black")+
geom_line()+
geom_line(aes(x=fecha, y=pred1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=pred2),col="red", lty=3)
df_test <- data.frame(fecha=
exp_cafe$fecha[358:369],
real=
exp_cafe$valor[358:369],
pred1 = fore1$mean, pred2 = fore2$mean,
li1=fore1$lower[,2], ls1=fore1$upper[,2],
li2=fore2$lower[,2], ls2=fore2$upper[,2])
df_test %>% ggplot(aes(x=fecha, y=real), col="black")+
geom_line()+
geom_line(aes(x=fecha, y=pred1),col="blue")+
geom_line(aes(x=fecha, y=li1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=ls1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=pred2),col="red", lty=3)+
geom_line(aes(x=fecha, y=li2),col="red", lty=3)+
geom_line(aes(x=fecha, y=ls2),col="red", lty=3)
##------------------------------------------------------------------------
modelo2 <- auto.arima(serie_boxcox, stepwise = FALSE,
approximation = FALSE)
modelo2 %>% coeftest()
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.891467 0.046299 19.2545 < 2.2e-16 ***
## ma1 -1.346292 0.077718 -17.3228 < 2.2e-16 ***
## ma2 0.358295 0.071190 5.0330 4.829e-07 ***
## sar1 0.255694 0.051188 4.9952 5.878e-07 ***
## sar2 0.239483 0.054509 4.3934 1.116e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict(modelo2, n.ahead = 12)
## $pred
## Jan Feb Mar Apr May Jun Jul
## 2022
## 2023 1.1774733 1.2094798 1.2205919 1.1134392 0.8848705 0.9313140 1.1894587
## Aug Sep Oct Nov Dec
## 2022 1.1413586 1.1771633 1.2812369
## 2023 1.1392396 1.0940245
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2022
## 2023 0.2694527 0.2825905 0.2932542 0.3020354 0.3093548 0.3155220 0.3207695
## Aug Sep Oct Nov Dec
## 2022 0.2035052 0.2317830 0.2529716
## 2023 0.3252755 0.3291787
train2 <- window(serie_boxcox, start=time(serie_boxcox)[1],
end = time(serie_boxcox)[length(serie_boxcox) - 12])
ts_info(train2)
## The train2 series is a ts object with 1 variable and 357 observations
## Frequency: 12
## Start time: 1992 1
## End time: 2021 9
test2 <- window(serie_boxcox, start = time(serie_boxcox)[length(serie_boxcox)
- 12 + 1],
end = time(serie_boxcox)[length(serie_boxcox)])
ts_info(test2)
## The test2 series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2021 10
## End time: 2022 9
# modelo 3: train
modelo2.train <- auto.arima(train2, stepwise = FALSE,
approximation = FALSE)
checkresiduals(modelo2.train)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,2)(2,0,0)[12]
## Q* = 23.292, df = 19, p-value = 0.2247
##
## Model df: 5. Total lags used: 24
# Asumiendo que los residuales del modelo provienen de una distribución normal, entonces evaluamos la precisión del modelo
# mediante la función accuracy del paquete forecast
fore3 <- forecast(modelo2.train, h=12)
accuracy(fore3, test2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01026467 0.2028746 0.1575465 -7.582076 89.37465 0.6028059
## Test set 0.34990784 0.3738558 0.3499078 28.622758 28.62276 1.3388205
## ACF1 Theil's U
## Training set 0.007388646 NA
## Test set 0.326057153 2.265063
test_forecast(actual = serie_boxcox, forecast.obj = fore3,
test = test2)
naive_model1 <- naive(train2, h = 12)
test_forecast(actual = serie_boxcox,
forecast.obj = naive_model1,
test = test2)
fore3_1 <- forecast(modelo2.train, h=12)
#NAIVE PARA ESTACIONALIDAD
accuracy(naive_model1, test2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002434154 0.2374662 0.1847410 -27.19072 113.36273 0.7068577
## Test set 0.267508882 0.2907778 0.2675089 21.22684 21.22684 1.0235448
## ACF1 Theil's U
## Training set -0.25572855 NA
## Test set -0.01514011 1.690385
snaive_model1 <- snaive(train2, h = 12)
test_forecast(actual = serie_boxcox,
forecast.obj = snaive_model1,
test = test2)
accuracy(snaive_model1, test2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02669156 0.3366235 0.2613553 19.76950 142.80606 1.000000
## Test set 0.41672873 0.5238508 0.4204452 34.91805 35.24398 1.608711
## ACF1 Theil's U
## Training set 0.6435311 NA
## Test set 0.2777256 3.258748
#Modelo 2: tso de la serie original
delta <- seq(0.1, 0.90, 0.1)
aic_1 <- vector()
ljungbox1 <- vector()
i = 0
for(d in delta){
i = i+1
modelo_outl <- tso(train2, delta=d)
aic_1[i] <- modelo_outl$fit$aic
ljungbox1[i] <- checkresiduals(modelo_outl$fit,
plot = FALSE)$p.value
}
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(2,0,0)[12] errors
## Q* = 34.672, df = 18, p-value = 0.01039
##
## Model df: 6. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(2,0,0)[12] errors
## Q* = 38.057, df = 17, p-value = 0.002414
##
## Model df: 7. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(2,0,0)[12] errors
## Q* = 38.229, df = 17, p-value = 0.002286
##
## Model df: 7. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(0,0,2)[12] errors
## Q* = 48.067, df = 18, p-value = 0.0001472
##
## Model df: 6. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(2,0,0)[12] errors
## Q* = 34.672, df = 18, p-value = 0.01039
##
## Model df: 6. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(2,0,0)[12] errors
## Q* = 34.672, df = 18, p-value = 0.01039
##
## Model df: 6. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,1)(2,0,0)[12] errors
## Q* = 34.672, df = 18, p-value = 0.01039
##
## Model df: 6. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,2)[12] errors
## Q* = 34.79, df = 18, p-value = 0.01005
##
## Model df: 6. Total lags used: 24
##
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,2)[12] errors
## Q* = 33.497, df = 18, p-value = 0.01452
##
## Model df: 6. Total lags used: 24
which.min(aic_1)
## [1] 2
delta[8]
## [1] 0.8
ljungbox1[8]
## [1] 0.0100455
modelo_train2 <- tso(train2, delta=0.8)
modelo_train2
## Series: train2
## Regression with ARIMA(1,1,1)(0,0,2)[12] errors
##
## Coefficients:
## ar1 ma1 sma1 sma2 LS30 AO300
## 0.3462 -0.7779 0.2238 0.2213 0.6657 0.7133
## s.e. 0.0959 0.0685 0.0553 0.0581 0.1612 0.1643
##
## sigma^2 = 0.03996: log likelihood = 70.01
## AIC=-126.01 AICc=-125.69 BIC=-98.89
##
## Outliers:
## type ind time coefhat tstat
## 1 LS 30 1994:06 0.6657 4.129
## 2 AO 300 2016:12 0.7133 4.341
modelo_train2$fit %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1)(0,0,2)[12] errors
## Q* = 34.79, df = 18, p-value = 0.01005
##
## Model df: 6. Total lags used: 24
modelo_train2$fit$residuals %>% shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.99603, p-value = 0.5127
modelo_train2$fit$residuals %>% jarque.bera.test()
##
## Jarque Bera Test
##
## data: .
## X-squared = 2.5234, df = 2, p-value = 0.2832
npred <- 12
newxreg <- outliers.effects(modelo_train2$outliers,
length(train2) + npred)
newxreg <- ts(newxreg[-seq_along(train2),],
start =time(serie_boxcox)[length(serie_boxcox)
- 12 + 1])
fore4 <- forecast(modelo_train2$fit, h=12,
xreg = newxreg)
accuracy(fore3, test2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01026467 0.2028746 0.1575465 -7.582076 89.37465 0.6028059
## Test set 0.34990784 0.3738558 0.3499078 28.622758 28.62276 1.3388205
## ACF1 Theil's U
## Training set 0.007388646 NA
## Test set 0.326057153 2.265063
accuracy(fore4, test2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001144921 0.1979330 0.1553162 -13.96856 95.36255 0.5942721
## Test set 0.320349251 0.3360762 0.3203493 26.00449 26.00449 1.2257231
## ACF1 Theil's U
## Training set -0.01238752 NA
## Test set 0.06284670 1.984962
df_train <- data.frame(fecha=
exp_cafe$fecha[1:length(train2)],
real=
exp_cafe$valor[1:length(train2)],
pred1 = InvBoxCox(modelo2.train$fitted, lambda = cafe_lambda),
pred2 = InvBoxCox(modelo_train2$fit$fitted, lambda = cafe_lambda))
df_train %>% ggplot(aes(x=fecha, y=real), col="black")+
geom_line()+
geom_line(aes(x=fecha, y=pred1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=pred2),col="red", lty=3)
df_test <- data.frame(fecha=
exp_cafe$fecha[358:369],
real=
exp_cafe$valor[358:369],
pred1 = fore3$mean, pred2 = fore4$mean,
li1=fore3$lower[,2], ls1=fore3$upper[,2],
li2=fore4$lower[,2], ls2=fore4$upper[,2])
df_test %>% ggplot(aes(x=fecha, y=real), col="black")+
geom_line()+
geom_line(aes(x=fecha, y=pred1),col="blue")+
geom_line(aes(x=fecha, y=li1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=ls1),col="blue", lty=2)+
geom_line(aes(x=fecha, y=pred2),col="red", lty=3)+
geom_line(aes(x=fecha, y=li2),col="red", lty=3)+
geom_line(aes(x=fecha, y=ls2),col="red", lty=3)